library(dplyr)
library(ggplot2)
library(purrr)
library(tibble)
library(tidyr)
library(stringr)
In this notebook, the average tensor values extracted across ROIs are plotted before filtering and aggregation of the data. All regions are included.
Abbreviations:
White Matter (WM)
Grey Matter (GM)
Cerebrospinal Fluid (CSF)
Ventricles (VN)
Fractional Asinotropy (FA)
Mean Diffusivity (MD)
Axial Diffusivity (AD)
Radial Diffusivity (RD)
Colors in the plots represent distinct subjects.
seg_df = read.csv('avg_tensor_by_roi.csv', colClasses = c('roi' = 'character')) %>%
mutate(subject = str_extract(sub, "(?<=sub-)\\d{5}"),
site = str_extract(sub, "(?<=sub-\\d{5})\\D+"),
visit = str_extract(sub, "[0-9]{2}$"),
.after = sub)
seg_df[,1] <- NULL
jlf_dict = readxl::read_xlsx('/home/vgonzenb/MSKIDS/data/MUSE_ROI_Dict.xlsx') %>%
filter(ROI_INDEX %in% 1:207)
#' Name ROIs according to Segmentation
#'
#' essentially, name_rois enables distinct recoding schemes of the 'roi' column according to the value in the 'segmentation' column.
#' for usage, split or filter a data.frame by unique segmentations and indicate the segmentation type
#' @param df a data.frame corresponding to data from one segmentation type only
#' @param seg_type the segmentation type: 'atropos', 'fast', 'first', 'jlfseg_WMGM', or 'jlfseg_thal'
#'@examples
#'\dontrun{
#' multiseg_df %>% split(seg_df$segmentation) %>% purrr::imap(name_rois) %>% dplyr::bind_rows()
#'}
#'@examples
#'\dontrun{
#' multiseg_df %>% filter(segmentation == seg_type) %>% name_rois(seg_type)
#'}
#' @return a data.frame with a recoded roi column
name_rois <- function(df, seg_type){
roi_names_by_seg_type <- list(atropos = c(`1` = "CSF", `2` = "GM", `3` = "WM"),
fast = c(`1` = "CSF", `2` = "GM", `3` = "WM"),
first = c(`10` = "L. Thalamus",
`11` = "L. Caudate",
`12` = "L. Putamen",
`13` = "L. Pallidum",
`16` = "Brain-Stem/4th Ventricle",
`17` = "L. Hippocampus",
`18` = "L. Amygdala",
`26` = "L. Accumbens-area",
`49` = "R. Thalamus",
`50` = "R. Caudate",
`51` = "R. Putamen",
`52` = "R. Pallidum",
`53` = "R. Hippocampus",
`54` = "R. Amygdala",
`58` = "R. Accumbens-area"),
jlfseg_WMGM = jlf_dict$ROI_NAME |> setNames(jlf_dict$ROI_INDEX),
jlfseg_thal = c(`1` = 'Thalamus'))
roi_names <- roi_names_by_seg_type[[seg_type]]
df %>%
mutate(roi = recode(roi, !!!roi_names))
}
seg_df <- seg_df %>%
split(seg_df$segmentation) %>%
imap(name_rois) %>%
bind_rows()
# name tissue types
add_tissue_col <- function(df, seg_type){
tissues_by_seg_type <- list(atropos = setNames(nm = c("CSF", "GM", "WM")),
fast = setNames(nm = c("CSF", "GM", "WM")),
first = setNames(c("Thalamus", "Thalamus"),
c("L.Thalamus", "R.Thalamus")),
jlfseg_WMGM = jlf_dict$TISSUE_SEG |> setNames(jlf_dict$ROI_NAME),
jlfseg_thal = c('Thalamus' = 'Thalamus'))
tissue_type <- tissues_by_seg_type[[seg_type]]
df %>%
mutate(tissue = recode(roi, !!!tissue_type), .after = "roi")
}
seg_df <- seg_df %>%
split(seg_df$segmentation) %>%
imap(add_tissue_col) %>%
bind_rows()
# plot each segmentation
plot_seg <- function(mini_df, title=NULL){
title <- paste(title, names(mini_df))
mini_df %>%
ggplot(aes(group=roi, x=roi, color=subject, y=values)) +
geom_boxplot(outlier.shape = NA, alpha = 0.4) +
geom_jitter() +
coord_flip() +
theme(legend.position="none")
}
plot_list <- seg_df %>%
filter(segmentation != 'jlfseg_WMGM') %>%
split(list(.$segmentation, .$tensormap)) %>%
purrr::imap(plot_seg)
for (plot_name in names(plot_list)){
header <- plot_name %>%
str_replace("\\.", " ") %>%
str_replace("_", " ") %>%
str_to_upper()
cat("###", header, "\n")
print(plot_list[[plot_name]])
cat('\n\n')
}
plot_list <- seg_df %>%
filter(segmentation == 'jlfseg_WMGM', !roi %in% c('46', '63', '64', '69'), tissue != 'GM') %>%
split(list(.$tissue, .$tensormap)) %>%
purrr::imap(plot_seg)
for (plot_name in names(plot_list)){
header <- plot_name %>%
str_replace("\\.", " ") %>%
str_to_upper()
cat("###", "JLF", header, "\n")
print(plot_list[[plot_name]])
cat('\n\n')
}
plot_list <- seg_df %>%
filter(segmentation == 'jlfseg_WMGM', !roi %in% c('46', '63', '64', '69'), tissue == 'GM') %>%
split(list(rep(c(1,2,3,4,5,6), each = 20)[-120], .$tensormap)) %>%
purrr::imap(plot_seg)
for (plot_name in names(plot_list)){
header <- plot_name %>%
str_replace("[0-9]+\\.", "") %>%
str_to_upper()
cat("###", "JLF GM", header, "\n")
print(plot_list[[plot_name]])
cat('\n\n')
}
Excluding non brain (label 0), irrelevant segmentations for first. JLF shows three labels 46, 63, 64, 69 that are not in the current data dictionary. This might lead to missing information on
seg_df %>%
mutate(exclude = str_detect(tissue, "^[0-9]+$")) %>%
filter(exclude == TRUE) %>%
filter(tensormap == 'FA') %>% # to reduce the amount of data viewer
group_by(segmentation, tissue) %>%
filter(row_number()==1, roi != 0) %>%
select(-tensormap, -tissue)
## Adding missing grouping variables: `tissue`
seg_df %>%
mutate(exclude = str_detect(.$tissue, "^[0-9]+$")) %>%
filter(exclude == TRUE) %>%
filter(tissue %in% c('NONE', 'CSF', 'VN'))